In [1]:
from io import StringIO
import pandas as pd
from bokeh.charts import Bar, TimeSeries
from bokeh.io import output_notebook, show
output_notebook()
In [2]:
issues = """
milestone,n closed
0.1,"2 closed"
"0.5.1","4 closed"
0.3,"14 closed"
0.4,"15 closed"
"0.6.1","15 closed"
0.2,"16 closed"
"0.8.1","21 closed"
0.6,"21 closed"
0.5,"35 closed"
"0.8.2","71 closed"
0.9,"93 closed"
"0.9.1","105 closed"
"0.7.1","117 closed"
0.7,"144 closed"
0.8,"147 closed"
"""
issues_df = pd.read_csv(StringIO(issues))
issues_df.sort('milestone', inplace=True)
issues_df = issues_df.set_index('milestone')
issues_df['n'] = issues_df['n closed'].str.split(' closed').str[0].astype(int)
issues_df.head()
Out[2]:
In [3]:
show(
Bar(
issues_df[['n']],
ylabel='# issues closed', xlabel='milestone',
title='Issues closed by milestone', tools='previewsave'
)
)
In [4]:
# Data from pypi vanity
pypi= """
package date count
bokeh-0.4.2p1.tar.gz 2014-03-13 2973
bokeh-0.2.tgz 2013-10-25 2414
bokeh-0.3.tar.gz 2013-11-19 4110
bokeh-0.4.tar.gz 2014-02-04 2495
bokeh-0.4.1.tar.gz 2014-02-19 2817
bokeh-0.4.4.zip 2014-04-16 1716
bokeh-0.4.4.tar.gz 2014-04-16 4703
bokeh-0.5.0.zip 2014-07-08 1333
bokeh-0.5.0.tar.gz 2014-07-08 2051
bokeh-0.5.1.zip 2014-07-23 1405
bokeh-0.5.1.tar.gz 2014-07-23 2438
bokeh-0.5.2.zip 2014-08-15 1189
bokeh-0.5.2.tar.gz 2014-08-15 2265
bokeh-0.6.0.zip 2014-09-10 1208
bokeh-0.6.0.tar.gz 2014-09-09 2018
bokeh-0.6.1.zip 2014-09-25 1604
bokeh-0.6.1.tar.gz 2014-09-25 5939
bokeh-0.7.0.zip 2014-12-05 832
bokeh-0.7.0.tar.gz 2014-12-05 3675
bokeh-0.7.1.zip 2015-01-12 668
bokeh-0.7.1.tar.gz 2015-01-12 2292
bokeh-0.8.0.zip 2015-02-16 451
bokeh-0.8.0.tar.gz 2015-02-16 870
bokeh-0.8.1.zip 2015-02-23 654
bokeh-0.8.1.tar.gz 2015-02-23 3457
bokeh-0.8.2.zip 2015-03-25 692
bokeh-0.8.2.tar.gz 2015-03-25 6601
bokeh-0.9.0.zip 2015-05-15 553
bokeh-0.9.0.tar.gz 2015-05-15 16109
bokeh-0.9.1.zip 2015-07-04 206
bokeh-0.9.1.tar.gz 2015-07-04 1632
"""
pypi_df = pd.read_table(StringIO(pypi), sep=r' +', engine='python', parse_dates=['date'])
pypi_df.sort('date', inplace=True)
def get_version(package_string):
import re
pattern = re.compile('bokeh-(?P<version>.+)(.tar.gz|.zip|.tgz)')
return re.search(pattern, package_string).groupdict().get('version')
pypi_df['version'] = pypi_df.package.apply(get_version)
pypi_df.head()
Out[4]:
In [5]:
pypi_by_version = pypi_df.groupby('version').sum().reset_index('version')
pypi_version_date = pypi_by_version.merge(pypi_df[['date', 'version']], on='version')
pypi_version_date.drop_duplicates('version', inplace=True)
pypi_version_date.head()
Out[5]:
In [6]:
# Plot version
plot_version = pypi_version_date[['version', 'count']]
plot_version = plot_version.set_index('version')
plot_version
show(
Bar(
plot_version,
title='Pypi downloads by version',
ylabel='pypi downloads', xlabel='version', tools='previewsave'
)
)
In [7]:
# Plot date
plot_date = pypi_version_date[['date', 'count']]
plot_date = plot_date.set_index('date')
plot_date
show(
TimeSeries(
plot_date,
title='Pypi downloads by date',
ylabel='pypi downloads', xlabel='date', tools='previewsave',
)
)
In [ ]: